Given a pair of merging sequences A, B and a target sequence T, the merged longest common subsequence (MLCS) problem is to find out the longest common subsequence (LCS) between sequences E ( A, B ) and T, where E ( A, B ) is obtained from merging two subsequences of A and B. In this paper, we first propose an algorithm for solving the MLCS problem in O (n|Σ| + (r − L + 1)Lm) time and O (n|Σ| + Lm) space, where r and L denote the lengths of T and MLCS, respectively, and m and n denote the shorter and longer lengths of A and B , respectively. From the time complexity, it is clear that our algorithm is very efficient when T and E ( A, B ) are very similar. With slight modification, our algorithm can also solve another merged LCS problem variant, the block-merged LCS (BMLCS) problem, in O (n|Σ| + (r − L + 1)Lδ) time and O (n|Σ| + Lδ) space, where δ denotes the larger number of blocks of A and B. Experimental results show that our algorithms are faster than other previously published MLCS and BMLCS algorithms for sequences with high similarities. The source codes and datasets for experiments can be found on our web site http://par.cse.nsysu.edu.tw/~mlcs/ [20].
class Btriple { public: int x; int y; int z; Btriple(int posA, int posB, int posT) { x = posA; y = posB; z = posT; } }; int BMLCS_nakatzu(vector< int> T, vector< int> A, vector< int>B, int Tlen, int Alen, int Blen, int size, vector< int> EOBA, vector< int> EOBB) { vector< vector< int>> nextA(size + 1, vector< int>(Alen + 1, 0)); vector< vector< int>> nextB(size + 1, vector< int>(Blen + 1, 0)); vector< int> PosBlockA(Alen + 1, 0); vector< int> PosBlockB(Blen + 1, 0); vector< int> FEOBA(Alen + 1, 0); vector< int> FEOBB(Blen + 1, 0); int MaxLen = 0; vector< vector< Btriple>> QL(Tlen + 2, vector< Btriple>()); vector< vector< Btriple>> tempQL(Tlen + 2, vector< Btriple>()); int qlsize = 0; int locationA = 0; int locationB = 0; //construct array of nextA for (int i = 1; i <= size; ++i) { locationA = Alen + 1; for (int j = Alen; j > 0; --j) { if (j == Alen) nextA[i][j] = locationA; if (A[j] == i) { locationA = j; nextA[i][j - 1] = locationA; } else { nextA[i][j - 1] = locationA; } } } //construct array of nextB for (int i = 1; i <= size; ++i) { locationB = Blen + 1; for (int j = Blen; j > 0; --j) { if (j == Blen) nextB[i][j] = locationB; if (B[j] == i) { locationB = j; nextB[i][j - 1] = locationB; } else { nextB[i][j - 1] = locationB; } } } //construct PosA and PosB for (int i = 0, count = 0; i < Alen; ++i) { if (i <= EOBA[count]) { PosBlockA[i] = EOBA[count]; } else { ++count; PosBlockA[i] = EOBA[count]; } } PosBlockA[Alen] = EOBA[EOBA.size() - 1]; for (int i = 0, count = 0; i < Blen; ++i) { if (i <= EOBB[count]) { PosBlockB[i] = EOBB[count]; } else { ++count; PosBlockB[i] = EOBB[count]; } } PosBlockB[Blen] = EOBB[EOBB.size() - 1]; //construct FEOBA and FEOBB int sizeEOBA = EOBA.size(), sizeEOBB = EOBB.size(); for (int i = 0; i < sizeEOBA; ++i) { FEOBA[EOBA[i]] = 1; } FEOBA[0] = 1; FEOBA[Alen] = 1; for (int i = 0; i < sizeEOBB; ++i) { FEOBB[EOBB[i]] = 1; } FEOBB[0] = 1; FEOBB[Blen] = 1; //main algorithm QL[0].push_back(Btriple(0, 0, 0)); int PosA = 0; int PosB = 0; int PosT = 0; int LocalLen = 0; bool f = false; int TurnLen = 0; int tmpti = 0; for (int i = 1; i <= Tlen; ++i) { if (i > Tlen - MaxLen) break; //PosT = i; LocalLen = 0; tmpti = Tlen - i; for (int j = 0; j <= tmpti; ++j) { f = false; TurnLen = LocalLen; PosT = i + j; for (auto mytriple = QL[TurnLen].begin(); mytriple != QL[TurnLen].end(); ++mytriple) { //if (PosT > mytriple->z){ PosA = nextA[T[PosT]][mytriple->x]; PosB = nextB[T[PosT]][mytriple->y]; if ((PosA <= Alen || PosB <= Blen) && f == false) { ++LocalLen; f = true; } //A[i] and B[j] are end of block. if (FEOBA[mytriple->x] == 1 && FEOBB[mytriple->y] == 1) { if (PosA <= Alen) { QL[LocalLen].push_back(Btriple(PosA, mytriple->y, PosT)); } if (PosB <= Blen) { QL[LocalLen].push_back(Btriple(mytriple->x, PosB, PosT)); } } //A[i] is EOB, but B[j] is not. else if (FEOBA[mytriple->x] == 1 && FEOBB[mytriple->y] == 0) { if (PosB <= Blen) { QL[LocalLen].push_back(Btriple(mytriple->x, PosB, PosT)); } if (PosB > PosBlockB[mytriple->y] && PosA <= Alen) { QL[LocalLen].push_back(Btriple(PosA, PosBlockB[mytriple->y], PosT)); } } //B[j] is EOB, but A[i] is not. else if (FEOBA[mytriple->x] == 0 && FEOBB[mytriple->y] == 1) { if (PosA <= Alen) { QL[LocalLen].push_back(Btriple(PosA, mytriple->y, PosT)); } if (PosA > PosBlockA[mytriple->x] && PosB <= Blen) { QL[LocalLen].push_back(Btriple(PosBlockA[mytriple->x], PosB, PosT)); } } //} } if (LocalLen > MaxLen) { MaxLen = LocalLen; } //2-D minima algorithm qlsize = QL[LocalLen].size(); if (qlsize > 1 && f == true) { for (int i = 0; i < qlsize; ++i) { Btriple temptri = QL[LocalLen][i]; if (!tempQL[temptri.x].empty() && temptri.y < tempQL[temptri.x][0].y) { tempQL[temptri.x].erase(tempQL[temptri.x].begin()); tempQL[temptri.x].push_back(temptri); } else if (tempQL[temptri.x].empty() == true) { tempQL[temptri.x].push_back(temptri); } } QL[LocalLen].clear(); int prevMaxY = Blen + 1; for (int i = 0; i <= Alen; ++i) { if (!tempQL[i].empty()) { if (tempQL[i][0].y < prevMaxY) { prevMaxY = tempQL[i][0].y; QL[LocalLen].push_back(tempQL[i][0]); } tempQL[i].clear(); } } } ++PosT; if (PosT > Tlen) break; if (f == false && QL[TurnLen + 1].empty() == true) { break; } if (f == false && QL[TurnLen + 1].empty() == false) { ++LocalLen; } } } return MaxLen; }